Statistical Methods

We superimposed the figures (the set of landmarks for each fish) using a two-step procedure. First, we used a two-point registration (Bookstein, xxx), with LM-01 and LM-06 to align each fish in an orientation that make the relative coordinate values functionally interpretable. Second, we use a generalized resistant fit (GRF) to re-superimpose the figures for subsequent analysis. For the GRF, we excluded LM-13 as the relative position of the tip of the caudal fin is strongly determined by preservation artifacts. We used the length of the caudal fin, measured as the distance between LM-06 and LM-13, as the x-coordinate for LM-13 in all subsequent analysis. We use a value of zero for the value of the y-coordinate for LM-13 in figures but not in any statistical analysis.

Both Flow and Predator treatments had effects on shape and size. To measure effects of flow on guppy body shape and size, we fit the regression model M1: \(site \times flow\) to each shape coordinate, where \(site\) is the sampling locality of the wildtype grandparent fish and \(flow\) is limited to the “No Flow” and “Flow” treatments (the “Flow + predators” treatment is excluded). Model M1 is size-free but not allometry-free, that is some of the variation in each shape coordinate is associated with size. Consequently, we also fit the regression model M2: \(site \times flow + size\), where \(size\) is the median size of each fish computed from the GRF.

Setup

geom_ancova_grouped <- function(m1, group_by = "pred"){
  geom_smooth(method = "lm",
              mapping = aes(y = predict(m1),
                            linetype = get(group_by))
  )
}

Import and wrangle

Import and wrangle raw coordinates

  1. Import raw coordinate values
data_folder <- "data/ghalambor"
file_name <- "allff.raw.txt"
file_path <- here(data_folder, file_name)
raw_lm <- fread(file_path) %>%
  clean_names()
raw_lm[, caudal_fin := sqrt((c_fx-x6)^2 + (cfy-y6)^2)]
raw_lm[, caudal_fin2 := c_fx-x6]
# temp <- raw_lm[dataset == "greenhouse",
#                .SD,
#                .SDcols = c("id", "gen", "treatment", "caudal_fin", "caudal_fin2")]
  1. what do raw coords look like?
raw_long <- melt(raw_lm,
              id.vars = c("id", "dataset"),
              measure.vars = list(paste0("x", 1:12),
                                  paste0("y", 1:12)),
              variable.name = c("landmark"),
              value.name = c("x", "y")
              )
qplot(x = x, y = y, data = raw_long) +
  coord_fixed()

  1. Superimpose using TPR to get into standard orientation
axis_levels <- c("x", "y")
lm_levels <- 1:12
lm_cols <- do.call(paste0, expand.grid(axis_levels, lm_levels))
lm_cols_x13 <- c(lm_cols, "x13")
lm_cols_13 <- c(lm_cols_x13, "y13")

lm_mat <- raw_lm[, .SD, .SDcols = lm_cols] %>%
  as.matrix()
lm_array <- matrix_2_array(lm_mat)
plot_shapes(lm_array)

lm_tpr <- tpr(lm_array, 1, 6)
plot_shapes(lm_tpr)

  1. Superimpose using GRF
lm_grf <- grf(lm_tpr)
plot_shapes(lm_grf)

  1. Merge imported and grf fit data
lm_grf_dt <- array_2_matrix(lm_grf) %>%
  data.table
colnames(lm_grf_dt) <- lm_cols
keep_cols <- setdiff(names(raw_lm), lm_cols)
shape <- cbind(raw_lm[, .SD, .SDcols = keep_cols],
                lm_grf_dt)
setnames(shape, "treatment", "drainage_treatment")
  1. Add size
centroid_size <- function(coords){
  cols <- 1:length(coords)
  even_col <- cols[lapply(cols, "%%", 2) == 0]
  odd_col <- cols[lapply(cols, "%%", 2) != 0]
  x <- coords[even_col]
  y <- coords[odd_col]
  xbar <- mean(x)
  ybar <- mean(y)
  cs <- sqrt(sum((x-xbar)^2 + (y-ybar)^2))
  return(cs)
}

cs <- apply(lm_mat, 1, centroid_size)
shape[, centroid_size := cs]

n <- dim(lm_array)[3]
for(i in 1:n){
  fig_i <- lm_array[,,i]
  shape[i, median_size := median_size(fig_i)]
}
shape[, x13 := mean(x6) + caudal_fin/median_size]
  1. Merge drainage data
data_folder <- "data/ghalambor"
file_name <- "stream to drainage map.txt"
file_path <- here(data_folder, file_name)
drainage_map <- fread(file_path) %>%
  clean_names()

shape1 <- merge(shape, drainage_map,
                by = "stream",
                sort = FALSE)
shape <- shape1
  1. create stream5 col
shape[, stream5 := substr(stream_id, 1, 5) %>% tolower()]
shape[, site := paste(stream5, tolower(pred_state), sep = "-")]

shape[, stream_id := factor(stream_id)]

Reshape shape data

  1. Reshape the shape data wide to long, stacking the x coordinates into a single x column and y coordinates into a single y column.
# names(shape_wide)
shape_long <- melt(shape,
              id.vars = c("id", "dataset", "stream_id",
                          "stream", "pred_state", "gen",
                          "drainage_treatment"),
              measure.vars = list(paste0("x", 1:13),
                                  paste0("y", 1:12)),
              variable.name = c("landmark"),
              value.name = c("x", "y")
              )
  1. A simple view of the coordinates to see what we have.
# unique(shape_wide$landmark)
qplot(x = x, y = y, data = shape_long) +
  coord_fixed()

  1. Create a column with the corrected treatment level
# unique(shape$dataset)
shape[dataset == "wild", treatment := "Wild"]
shape[dataset == "garden", treatment := "No Flow"]
shape[dataset == "greenhouse" &
        drainage_treatment == "pike", treatment := "Flow + pike"]
shape[dataset == "greenhouse" &
        drainage_treatment == "no_pike", treatment := "Flow"]
  1. Change quII to quar. In the garden dataset, stream_id includes “quII”. In the greenhouse dataset, stream_id includes “quar”. Are these the same?
unique(shape$stream_id)
##  [1] "arimaRi"       "AripoIn"       "AripoDC"       "ElCedroMain"  
##  [5] "ElCedroDavid"  "ElCedroIn"     "GuanapoMain"   "JordanTrib"   
##  [9] "Marianne"      "MarianneTrib"  "Mauzica"       "Oropuche"     
## [13] "Paria"         "Quare6"        "Quare6_2"      "QuareII"      
## [17] "TurareHigh"    "TurareMed"     "turaretriblow" "turaremedpred"
## [21] "yaratrib"      "ardc"          "arin"          "guan"         
## [25] "orop"          "quII"          "turu"          "quar"
shape[stream_id == "quII", stream_id := "quar"]

lab-reared

create lab-reared subset

f2 <- shape[gen != "f0"]

# recode treatment
recode <- FALSE
if(recode == TRUE){
  f2[, treatment_old := treatment]
  f2[gen == "p03", treatment := "Flow + pike"]
  f2[gen == "p01", treatment := "Flow"]
}

flow_levels <- c("No Flow", "Flow")
pred_levels <- c("Flow", "Flow + pike")
treatment_levels <- union(flow_levels, pred_levels)
treatment_tank_levels <- c("No Flow f2", "Flow p02", "Flow p03",
                           "Flow + pike p01", "Flow + pike p04")
# recoded
if(recode == TRUE){
  treatment_tank_levels <- c("No Flow f2", "Flow p01", "Flow p02", "Flow + pike p03", "Flow + pike p04")
}

f2[, treatment_tank := factor(paste(treatment, gen),
   levels = treatment_tank_levels)]
f2[, treatment := factor(treatment,
                              levels = treatment_levels)]

stream_id_levels <- c("ardc", "arin", "orop", "quar", "turu", "guan")
pred_state_levels <- c("High", "Med", "Low", "LowIntro" )
f2[, stream_id := factor(stream_id, levels = stream_id_levels)]
f2[, pred_state := factor(pred_state, levels = pred_state_levels)]

# create sample_name
f2[, sample_name := paste("fish", .I)]
# f2[, sample_name := .I]

How similar are treatment tanks?

size free

Y <- f2[, .SD, .SDcols = lm_cols_x13] %>%
  as.matrix()
row.names(Y) = f2$sample_name
S <- cov(Y)
E <- eigen(S)$vectors
f2[, pc1 := (Y %*% E[,1])[,1]]
f2[, pc2 := (Y %*% E[,2])[,1]]
f2[, pc3 := (Y %*% E[,3])[,1]]

gg1 <- ggplot(data = f2,
       aes(x = pc1,
           y = pc2,
           color = treatment_tank)) +
  geom_point() +
  theme_pubr() +
  ggtitle("PC1 v PC2: size free") +
  scale_color_manual(values = pal_okabe_ito) +
  theme(legend.title = element_blank()) +
  guides(color = guide_legend(nrow = 2, byrow = TRUE)) +
  
  NULL

gg2 <- ggplot(data = f2,
       aes(x = pc2,
           y = pc3,
           color = treatment_tank)) +
  geom_point() +
  theme_pubr() +
  ggtitle("PC2 v PC3: size free") +
  scale_color_manual(values = pal_okabe_ito) +
  theme(legend.title = element_blank()) +
  guides(color = guide_legend(nrow = 2, byrow = TRUE)) +

  NULL

plot_grid(gg1, gg2, nrow = 2)

scatter3d(pc3 ~ pc1 + pc2 | treatment_tank,
          data = f2,
          surface = FALSE)
# unstandardized
dist_mat <- dist(Y, method = 'euclidean')
hclust_avg <- hclust(dist_mat, method = 'average')
# plot(hclust_avg)

# extract dendrogram segment data
dendrogram_data <- dendro_data(hclust_avg)
dendrogram_segments <- dendrogram_data$segments # contains all dendrogram segment data

# get terminal dendrogram segments
dendrogram_ends <- dendrogram_segments %>%
 filter(yend == 0) %>% # filter for terminal dendrogram ends
 left_join(dendrogram_data$labels, by = "x") %>% # .$labels contains the row names from dist_matrix (i.e., sample_name)
 rename(sample_name = label) %>%
 left_join(f2, by = "sample_name") 
# dataframe now contains only terminal dendrogram segments and merged metadata associated with each iris

gg_tree <- ggplot() +
  geom_segment(data = dendrogram_segments,
               aes(x = x,
                   y = y,
                   xend = xend,
                   yend = yend)) +
  geom_segment(data = dendrogram_ends,
               aes(x = x,
                   y = y.x,
                   xend = xend,
                   yend = yend,
                   # text = paste("sample name: ",
                   #              sample_name, "<br>",
                   #              "species: ", Species),
                   color = treatment_tank)) +
  # test aes is for plotly
  scale_color_manual(values = pal_okabe_ito[1:5]) +
  scale_y_reverse() +
  coord_flip() +
  theme_bw() +
  theme(legend.position = "none") +
  ylab("Distance") + # flipped x and y coordinates for aesthetic reasons
  ggtitle("Treatment clusters -- size free")

gg_tree

Allometry free

Y_raw <- f2[, .SD, .SDcols = lm_cols_x13] %>%
  as.matrix()
m1 <- lm(Y ~ median_size, data = f2)
Y <- residuals(m1)
row.names(Y) = f2$sample_name
S <- cov(Y)
E <- eigen(S)$vectors
f2[, pc1_allom_free := (Y %*% E[,1])[,1]]
f2[, pc2_allom_free := (Y %*% E[,2])[,1]]
f2[, pc3_allom_free := (Y %*% E[,3])[,1]]

gg1 <- ggplot(data = f2,
       aes(x = pc1_allom_free,
           y = pc2_allom_free,
           color = treatment_tank)) +
  geom_point() +
  theme_pubr() +
  ggtitle("PC1 v PC2: allometry free") +
  scale_color_manual(values = pal_okabe_ito) +
  theme(legend.title = element_blank()) +
  guides(color = guide_legend(nrow = 2, byrow = TRUE)) +
  
  NULL

gg2 <- ggplot(data = f2,
       aes(x = pc2,
           y = pc3,
           color = treatment_tank)) +
  geom_point() +
  theme_pubr() +
  ggtitle("PC2 v PC3: allometry free") +
  scale_color_manual(values = pal_okabe_ito) +
  theme(legend.title = element_blank()) +
  guides(color = guide_legend(nrow = 2, byrow = TRUE)) +

  NULL

gg1

gg2

#plot_grid(gg1, gg2, nrow = 2)
# unstandardized
dist_mat <- dist(Y, method = 'euclidean')
hclust_avg <- hclust(dist_mat, method = 'average')
# plot(hclust_avg)

# extract dendrogram segment data
dendrogram_data <- dendro_data(hclust_avg)
dendrogram_segments <- dendrogram_data$segments # contains all dendrogram segment data

# get terminal dendrogram segments
dendrogram_ends <- dendrogram_segments %>%
 filter(yend == 0) %>% # filter for terminal dendrogram ends
 left_join(dendrogram_data$labels, by = "x") %>% # .$labels contains the row names from dist_matrix (i.e., sample_name)
 rename(sample_name = label) %>%
 left_join(f2, by = "sample_name") 
# dataframe now contains only terminal dendrogram segments and merged metadata associated with each iris

gg_tree <- ggplot() +
  geom_segment(data = dendrogram_segments,
               aes(x = x,
                   y = y,
                   xend = xend,
                   yend = yend)) +
  geom_segment(data = dendrogram_ends,
               aes(x = x,
                   y = y.x,
                   xend = xend,
                   yend = yend,
                   # text = paste("sample name: ",
                   #              sample_name, "<br>",
                   #              "species: ", Species),
                   color = treatment_tank)) +
  # test aes is for plotly
  scale_color_manual(values = pal_okabe_ito[1:5]) +
  scale_y_reverse() +
  coord_flip() +
  theme_bw() +
  theme(legend.position = "none") +
  ylab("Distance") + # flipped x and y coordinates for aesthetic reasons
  ggtitle("Treatment clusters -- unstandardized")

gg_tree

Notes

  1. Distinct treatment groups not very conspicuous. Is this largely size + pc1?
gg1 <- ggplot(data = f2,
       aes(x = median_size,
           y = pc1,
           color = treatment_tank)) +
  geom_point() +
  theme_pubr() +
  ggtitle("PC1 v median size: size free") +
  scale_color_manual(values = pal_okabe_ito) +
  theme(legend.title = element_blank()) +
  guides(color = guide_legend(nrow = 2, byrow = TRUE)) +
  
  NULL

gg2 <- ggplot(data = f2,
       aes(x = median_size,
           y = pc1_allom_free,
           color = treatment_tank)) +
  geom_point() +
  theme_pubr() +
  ggtitle("PC1 v median size: allometry free") +
  scale_color_manual(values = pal_okabe_ito) +
  theme(legend.title = element_blank()) +
  guides(color = guide_legend(nrow = 2, byrow = TRUE)) +
  
  NULL


gg1

gg2

Univariate model fits

pd <- position_dodge(0.4)

gg_univariate <- list()
p <- length(lm_cols_x13)
for(j in 1:p){
  gg <- ggplot(data = f2,
               aes(x = treatment_tank,
                   y = get(lm_cols_x13[j]),
                   color = stream_id)) +
    geom_point(position = pd) +
    ylab(lm_cols_x13[j]) +
    scale_color_manual(values = pal_okabe_ito_blue) +
    NULL
  gg_univariate[[length(gg_univariate)+1]] <- ggplotGrob(gg)
}
 names(gg_univariate) <- lm_cols_x13

plot_grid(gg_univariate[[1]],
          gg_univariate[[2]], nrow = 2)

plot_grid(gg_univariate[[3]],
          gg_univariate[[4]], nrow = 2)

plot_grid(gg_univariate[[5]],
          gg_univariate[[6]], nrow = 2)

plot_grid(gg_univariate[[7]],
          gg_univariate[[8]], nrow = 2)

plot_grid(gg_univariate[[9]],
          gg_univariate[[10]], nrow = 2)

plot_grid(gg_univariate[[11]],
          gg_univariate[[12]], nrow = 2)

plot_grid(gg_univariate[[13]],
          gg_univariate[[14]], nrow = 2)

plot_grid(gg_univariate[[15]],
          gg_univariate[[16]], nrow = 2)

plot_grid(gg_univariate[[17]],
          gg_univariate[[18]], nrow = 2)

plot_grid(gg_univariate[[19]],
          gg_univariate[[20]], nrow = 2)

plot_grid(gg_univariate[[21]],
          gg_univariate[[22]], nrow = 2)

plot_grid(gg_univariate[[23]],
          gg_univariate[[24]], nrow = 2)

plot_grid(gg_univariate[[25]])

Combined flow and pred analysis

  1. contrasts
contrast_matrix <- diag(18)
stream_ids <- paste0(levels(f2$stream_id), "_")
flow_levels <- c("noflow", "flow", "flow+pike")
contrast_labels <- do.call(paste0, expand.grid(stream_ids, flow_levels))

row.names(contrast_matrix) <- contrast_labels
lm_pairs <- list(
  "ardc:flow - no flow" = contrast_matrix["ardc_flow",] -
    contrast_matrix["ardc_noflow",],
  "arin:flow - no flow" = contrast_matrix["arin_flow",] -
    contrast_matrix["arin_noflow",],
  "orop:flow - no flow" = contrast_matrix["orop_flow",] -
    contrast_matrix["orop_noflow",],
  "quar:flow - no flow" = contrast_matrix["quar_flow",] -
    contrast_matrix["quar_noflow",],
  "turu:flow - no flow" = contrast_matrix["turu_flow",] -
    contrast_matrix["turu_noflow",],
  "guan:flow - no flow" = contrast_matrix["guan_flow",] -
    contrast_matrix["guan_noflow",],
  "ardc:pred - no pred" = contrast_matrix["ardc_flow+pike",] -
    contrast_matrix["ardc_flow",],
  "arin:pred - no pred" = contrast_matrix["arin_flow+pike",] -
    contrast_matrix["arin_flow",],
  "orop:pred - no pred" = contrast_matrix["orop_flow+pike",] -
    contrast_matrix["orop_flow",],
  "quar:pred - no pred" = contrast_matrix["quar_flow+pike",] -
    contrast_matrix["quar_flow",],
  "turu:pred - no pred" = contrast_matrix["turu_flow+pike",] -
    contrast_matrix["turu_flow",],
  "guan:pred - no pred" = contrast_matrix["guan_flow+pike",] -
    contrast_matrix["guan_flow",]
)
contrast_matrix <- diag(30)
stream_ids <- paste0(levels(f2$stream_id), "_")
flow_levels <- c("noflow", "flow-p02", "flow-p03", "flow+pike-p01", "flow+pike-p04")
contrast_labels <- do.call(paste0, expand.grid(stream_ids, flow_levels))

row.names(contrast_matrix) <- contrast_labels
lm_pairs <- list(
  "ardc:flow - no flow" = contrast_matrix["ardc_flow",] -
    contrast_matrix["ardc_noflow",],
  "arin:flow - no flow" = contrast_matrix["arin_flow",] -
    contrast_matrix["arin_noflow",],
  "orop:flow - no flow" = contrast_matrix["orop_flow",] -
    contrast_matrix["orop_noflow",],
  "quar:flow - no flow" = contrast_matrix["quar_flow",] -
    contrast_matrix["quar_noflow",],
  "turu:flow - no flow" = contrast_matrix["turu_flow",] -
    contrast_matrix["turu_noflow",],
  "guan:flow - no flow" = contrast_matrix["guan_flow",] -
    contrast_matrix["guan_noflow",],
  "ardc:pred - no pred" = contrast_matrix["ardc_flow+pike",] -
    contrast_matrix["ardc_flow",],
  "arin:pred - no pred" = contrast_matrix["arin_flow+pike",] -
    contrast_matrix["arin_flow",],
  "orop:pred - no pred" = contrast_matrix["orop_flow+pike",] -
    contrast_matrix["orop_flow",],
  "quar:pred - no pred" = contrast_matrix["quar_flow+pike",] -
    contrast_matrix["quar_flow",],
  "turu:pred - no pred" = contrast_matrix["turu_flow+pike",] -
    contrast_matrix["turu_flow",],
  "guan:pred - no pred" = contrast_matrix["guan_flow+pike",] -
    contrast_matrix["guan_flow",]
)
  1. Fit univariate lm models with size as covariate
lm_adj <- list()
lm_adj_emm <- list()
lm_adj_pairs <- list()
for(j in 1:length(lm_cols_x13)){
  # model fit
  form_j <- paste0(lm_cols_x13[j],
                   " ~ stream_id * treatment_tank + median_size") %>%
    formula()
  m1 <- lm(form_j, data = f2)

  # inference
  m1_emm <- emmeans(m1,
                    specs = c("stream_id", "treatment_tank"))
  # m1_pairs <- contrast(m1_emm,
  #                      method = lm_pairs,
  #                      adjust = "none") %>%
  #   summary(infer = TRUE)
  
  # save to list
  lm_adj[[length(lm_adj)+1]] <- m1
  lm_adj_emm[[length(lm_adj_emm)+1]] <- m1_emm
#  lm_adj_pairs[[length(lm_adj_pairs)+1]] <- m1_pairs
}
names(lm_adj) <- lm_cols_x13
names(lm_adj_emm) <- lm_cols_x13
#names(lm_adj_pairs) <- lm_cols_x13
  1. Plot univariate models
lm_gg <- list()
for(j in 1:length(lm_cols_x13)){
  m1 <- lm_adj[[j]]
  m1_emm <- lm_adj_emm[[j]] %>%
    summary()
#  m1_pairs <- lm_adj_pairs[[j]]
  pd <- position_dodge(0.2)
  gg <- ggplot(data = m1_emm,
               aes(x = stream_id,
                   y = emmean,
                   color = treatment_tank)) +
    geom_point(position = pd) +
    theme_pubr() +
    ggtitle(lm_cols_x13[j]) +
    scale_color_manual(values = pal_okabe_ito) +
    theme(legend.title = element_blank()) +
    guides(color = guide_legend(nrow = 2, byrow = TRUE)) +
    NULL
  lm_gg[[length(lm_gg)+1]] <- gg
}
names(lm_gg) <- lm_cols_x13

plot_grid(lm_gg[[1]],
          lm_gg[[2]], ncol = 2)

plot_grid(lm_gg[[3]],
          lm_gg[[4]], ncol = 2)

plot_grid(lm_gg[[5]],
          lm_gg[[6]], ncol = 2)

plot_grid(lm_gg[[7]],
          lm_gg[[8]], ncol = 2)

plot_grid(lm_gg[[9]],
          lm_gg[[10]], ncol = 2)

plot_grid(lm_gg[[11]],
          lm_gg[[12]], ncol = 2)

plot_grid(lm_gg[[13]],
          lm_gg[[14]], ncol = 2)

plot_grid(lm_gg[[15]],
          lm_gg[[16]], ncol = 2)

plot_grid(lm_gg[[17]],
          lm_gg[[18]], ncol = 2)

plot_grid(lm_gg[[19]],
          lm_gg[[20]], ncol = 2)

plot_grid(lm_gg[[21]],
          lm_gg[[22]], ncol = 2)

plot_grid(lm_gg[[23]],
          lm_gg[[24]], ncol = 2)

plot_grid(lm_gg[[25]])

Notes

  1. This figure is for supplement.
  2. In general, the results are similar to the shape-only results – there is very consistent pattern of flow treatment effect among streams. Where there is inconsistent direction, the magnitude of differences is small relative to within stream x treatment variation.
flow_gg <- list()
p <- length(lm_cols_x13)
for(j in 1:p){
  m1 <- lm_adj[[j]]
  gg <- ggplot(data = f2,
               aes(x = median_size,
                   y = get(lm_cols_x13[j]),
                   color = stream_id)) +
    geom_point() +
    geom_ancova_grouped(m1, group_by = "treatment_tank") +
    ylab(lm_cols_x13[j]) +
    scale_color_manual(values = pal_okabe_ito_blue) +
    NULL
  flow_gg[[length(flow_gg)+1]] <- ggplotGrob(gg)
}
names(flow_gg) <- lm_cols_x13

plot_grid(flow_gg[[1]],
          flow_gg[[2]], nrow = 2)

plot_grid(flow_gg[[3]],
          flow_gg[[4]], nrow = 2)

plot_grid(flow_gg[[5]],
          flow_gg[[6]], nrow = 2)

plot_grid(flow_gg[[7]],
          flow_gg[[8]], nrow = 2)

plot_grid(flow_gg[[9]],
          flow_gg[[10]], nrow = 2)

plot_grid(flow_gg[[11]],
          flow_gg[[12]], nrow = 2)

plot_grid(flow_gg[[13]],
          flow_gg[[14]], nrow = 2)

plot_grid(flow_gg[[15]],
          flow_gg[[16]], nrow = 2)

plot_grid(flow_gg[[17]],
          flow_gg[[18]], nrow = 2)

plot_grid(flow_gg[[19]],
          flow_gg[[20]], nrow = 2)

plot_grid(flow_gg[[21]],
          flow_gg[[22]], nrow = 2)

plot_grid(flow_gg[[23]],
          flow_gg[[24]], nrow = 2)

plot_grid(flow_gg[[25]])

What is the effect of flow on body shape?

  1. create subset excluding “Flow + pike”
flow_levels <- c("No Flow", "Flow")
flow_f2 <- shape[treatment %in% flow_levels &
                  gen != "f0"]
flow_f2[, flow := factor(treatment,
                              levels = flow_levels)]

stream_id_levels <- c("ardc", "arin", "orop", "quar", "turu", "guan")
pred_state_levels <- c("High", "Med", "Low", "LowIntro" )
flow_f2[, stream_id := factor(stream_id, levels = stream_id_levels)]
flow_f2[, pred_state := factor(pred_state, levels = pred_state_levels)]
  1. Create contrasts matrix
contrast_matrix <- diag(12)
stream_ids <- paste0(levels(flow_f2$stream_id), "_")
flow_levels <- c("noflow", "flow" )
contrast_labels <- do.call(paste0, expand.grid(stream_ids, flow_levels))

row.names(contrast_matrix) <- contrast_labels
flow_pairs <- list(
  "ardc:flow - no flow" = contrast_matrix["ardc_flow",] -
    contrast_matrix["ardc_noflow",],
  "arin:flow - no flow" = contrast_matrix["arin_flow",] -
    contrast_matrix["arin_noflow",],
  "orop:flow - no flow" = contrast_matrix["orop_flow",] -
    contrast_matrix["orop_noflow",],
  "quar:flow - no flow" = contrast_matrix["quar_flow",] -
    contrast_matrix["quar_noflow",],
  "turu:flow - no flow" = contrast_matrix["turu_flow",] -
    contrast_matrix["turu_noflow",],
  "guan:flow - no flow" = contrast_matrix["guan_flow",] -
    contrast_matrix["guan_noflow",]
)

Univariate fit and plots

Shape only

  1. Model fits to each coordinate
flow_lm <- list()
flow_lm_emm <- list()
flow_lm_pairs <- list()
for(j in 1:length(lm_cols_x13)){
  # model fit
  form_j <- paste0(lm_cols_x13[j],
                   " ~ stream_id * flow") %>%
    formula()
  m1 <- lm(form_j, data = flow_f2)

  # inference

  m1_emm <- emmeans(m1,
                    specs = c("stream_id", "flow"))
  m1_pairs <- contrast(m1_emm,
                       method = flow_pairs,
                       adjust = "none") %>%
    summary(infer = TRUE)
  
  # save to list
  flow_lm[[length(flow_lm)+1]] <- m1
  flow_lm_emm[[length(flow_lm_emm)+1]] <- m1_emm
  flow_lm_pairs[[length(flow_lm_pairs)+1]] <- m1_pairs
}
names(flow_lm) <- lm_cols_x13
names(flow_lm_emm) <- lm_cols_x13
names(flow_lm_pairs) <- lm_cols_x13
  1. Plot univariate models
flow_gg <- list()
for(j in 1:length(lm_cols_x13)){
  m1 <- flow_lm[[j]]
  m1_emm <- flow_lm_emm[[j]]
  m1_pairs <- flow_lm_pairs[[j]]
  gg <- ggplot_the_response(m1, m1_emm, m1_pairs,
                            y_label = lm_cols_x13[j])
  flow_gg[[length(flow_gg)+1]] <- gg
}
names(flow_gg) <- lm_cols_x13

plot_grid(flow_gg[[1]],
          flow_gg[[2]], ncol = 2)

plot_grid(flow_gg[[3]],
          flow_gg[[4]], ncol = 2)

plot_grid(flow_gg[[5]],
          flow_gg[[6]], ncol = 2)

plot_grid(flow_gg[[7]],
          flow_gg[[8]], ncol = 2)

plot_grid(flow_gg[[9]],
          flow_gg[[10]], ncol = 2)

plot_grid(flow_gg[[11]],
          flow_gg[[12]], ncol = 2)

plot_grid(flow_gg[[13]],
          flow_gg[[14]], ncol = 2)

plot_grid(flow_gg[[15]],
          flow_gg[[16]], ncol = 2)

plot_grid(flow_gg[[17]],
          flow_gg[[18]], ncol = 2)

plot_grid(flow_gg[[19]],
          flow_gg[[20]], ncol = 2)

plot_grid(flow_gg[[21]],
          flow_gg[[22]], ncol = 2)

plot_grid(flow_gg[[23]],
          flow_gg[[24]], ncol = 2)

plot_grid(flow_gg[[25]])

Notes

  1. This figure is for supplement.
  2. In general, there is very consistent pattern of flow treatment effect among streams. Where there is inconsistent direction, the magnitude of differences is small relative to within stream x treatment variation.

Shape adjusted for size (ANCOVA)

  1. Fit univariate lm models with size as covariate
flow_lm_adj <- list()
flow_lm_adj_emm <- list()
flow_lm_adj_pairs <- list()
for(j in 1:length(lm_cols_x13)){
  # model fit
  form_j <- paste0(lm_cols_x13[j],
                   " ~ stream_id * flow + median_size") %>%
    formula()
  m1 <- lm(form_j, data = flow_f2)

  # inference

  m1_emm <- emmeans(m1,
                    specs = c("stream_id", "flow"))
  m1_pairs <- contrast(m1_emm,
                       method = flow_pairs,
                       adjust = "none") %>%
    summary(infer = TRUE)
  
  # save to list
  flow_lm_adj[[length(flow_lm_adj)+1]] <- m1
  flow_lm_adj_emm[[length(flow_lm_adj_emm)+1]] <- m1_emm
  flow_lm_adj_pairs[[length(flow_lm_adj_pairs)+1]] <- m1_pairs
}
names(flow_lm_adj) <- lm_cols_x13
names(flow_lm_adj_emm) <- lm_cols_x13
names(flow_lm_adj_pairs) <- lm_cols_x13
  1. Plot univariate models
flow_gg <- list()
for(j in 1:length(lm_cols_x13)){
  m1 <- flow_lm_adj[[j]]
  m1_emm <- flow_lm_adj_emm[[j]]
  m1_pairs <- flow_lm_adj_pairs[[j]]
  gg <- ggplot_the_response(m1, m1_emm, m1_pairs,
                            y_label = lm_cols_x13[j])
  flow_gg[[length(flow_gg)+1]] <- gg
}
names(flow_gg) <- lm_cols_x13

plot_grid(flow_gg[[1]],
          flow_gg[[2]], ncol = 2)

plot_grid(flow_gg[[3]],
          flow_gg[[4]], ncol = 2)

plot_grid(flow_gg[[5]],
          flow_gg[[6]], ncol = 2)

plot_grid(flow_gg[[7]],
          flow_gg[[8]], ncol = 2)

plot_grid(flow_gg[[9]],
          flow_gg[[10]], ncol = 2)

plot_grid(flow_gg[[11]],
          flow_gg[[12]], ncol = 2)

plot_grid(flow_gg[[13]],
          flow_gg[[14]], ncol = 2)

plot_grid(flow_gg[[15]],
          flow_gg[[16]], ncol = 2)

plot_grid(flow_gg[[17]],
          flow_gg[[18]], ncol = 2)

plot_grid(flow_gg[[19]],
          flow_gg[[20]], ncol = 2)

plot_grid(flow_gg[[21]],
          flow_gg[[22]], ncol = 2)

plot_grid(flow_gg[[23]],
          flow_gg[[24]], ncol = 2)

plot_grid(flow_gg[[25]])

Notes

  1. This figure is for supplement.
  2. In general, the results are similar to the shape-only results – there is very consistent pattern of flow treatment effect among streams. Where there is inconsistent direction, the magnitude of differences is small relative to within stream x treatment variation.
flow_gg <- list()
p <- length(lm_cols_x13)
for(j in 1:p){
  m1 <- flow_lm_adj[[j]]
  gg <- ggplot(data = flow_f2,
               aes(x = median_size,
                   y = get(lm_cols_x13[j]),
                   color = stream_id)) +
    geom_point() +
    geom_ancova_grouped(m1, group_by = "flow") +
    ylab(lm_cols_x13[j]) +
    scale_color_manual(values = pal_okabe_ito_blue) +
    NULL
  flow_gg[[length(flow_gg)+1]] <- ggplotGrob(gg)
}
names(flow_gg) <- lm_cols_x13

plot_grid(flow_gg[[1]],
          flow_gg[[2]], nrow = 2)

plot_grid(flow_gg[[3]],
          flow_gg[[4]], nrow = 2)

plot_grid(flow_gg[[5]],
          flow_gg[[6]], nrow = 2)

plot_grid(flow_gg[[7]],
          flow_gg[[8]], nrow = 2)

plot_grid(flow_gg[[9]],
          flow_gg[[10]], nrow = 2)

plot_grid(flow_gg[[11]],
          flow_gg[[12]], nrow = 2)

plot_grid(flow_gg[[13]],
          flow_gg[[14]], nrow = 2)

plot_grid(flow_gg[[15]],
          flow_gg[[16]], nrow = 2)

plot_grid(flow_gg[[17]],
          flow_gg[[18]], nrow = 2)

plot_grid(flow_gg[[19]],
          flow_gg[[20]], nrow = 2)

plot_grid(flow_gg[[21]],
          flow_gg[[22]], nrow = 2)

plot_grid(flow_gg[[23]],
          flow_gg[[24]], nrow = 2)

plot_grid(flow_gg[[25]])

Notes

  1. This figure is for supplement.
  2. In general, there is a strong allometry effect (size covariate) at each coordinate. the results are similar to the shape-only results – there is very consistent pattern of flow treatment effect among streams. Where there is inconsistent direction, the magnitude of differences is small relative to within stream x treatment variation.

Plot of effect of flow on body shape

Shape only

  1. Group means
flow_group_means <- data.table()
for(j in 1:length(lm_cols_x13)){
  m1_emm <- flow_lm_emm[[j]] %>%
    summary()
  flow_group_means <- rbind(
    flow_group_means,
    data.table(coord = lm_cols_x13[j],
               m1_emm)
  )
}
flow_group_means[, coord := factor(coord, levels = lm_cols_x13)]

flow_group_means[, axis := substr(coord, 1, 1)]
flow_group_means[, lm := substr(coord, 2, nchar(as.character(coord)))]
flow_group_means[, lm := factor(lm,
                              levels = as.character(1:13))]

# cast to wide with x and y cols for each landmark
flow_group_means_wide <- dcast(flow_group_means,
                   flow + stream_id + lm ~ axis,
                   value.var = "emmean")

flow_group_means_wide[lm == "13", y := 0]

flow_treatment_means_wide <- flow_group_means_wide[, .(
  x = mean(x),
  y = mean(y)),
  by = .(flow, lm)]

# replace eye with lm1 in treatment means table
flow_treatment_means_wide[lm == "12",
                              x := flow_treatment_means_wide[lm == "1", x]]
flow_treatment_means_wide[lm == "12",
                              y := flow_treatment_means_wide[lm == "1", y]]
  1. Plot effect of flow
gg1 <- ggplot(data = flow_group_means_wide,
              aes(x = x,
                  y = y,
                  color = flow)) +
  geom_point(size = 1) +
  geom_path(data = flow_treatment_means_wide[lm != "13"],
             aes(color = flow)) +
  coord_fixed() +
  theme_pubr() +
  scale_color_manual(values = pal_okabe_ito_blue) +
  NULL
gg1
Figure 1. Model mean shape coordinates

Figure 1. Model mean shape coordinates

fig_cap <- "Model mean shape coordinates"
if(is.null(getOption("knitr.in.progress"))){
  cap <- fig_cap
}else{
  cap <- capFig(fig_cap)
}
flow_grandmean <- flow_group_means_wide[, .(x = mean(x),
                                            y = mean(y)),
                                        by = .(lm, flow)]
tps_cols <- c("x", "y")
x <- flow_grandmean[flow == "Flow", .SD, .SDcols = tps_cols] %>%
  as.matrix
y <- flow_grandmean[flow == "No Flow", .SD, .SDcols = tps_cols] %>%
  as.matrix
tps(x, y,
    gridlines = 50,
    multiple = 2)

Shape adjusted for size

  1. Size adjusted group means
flow_group_means_adj <- data.table()
for(j in 1:length(lm_cols_x13)){
  m1_emm <- flow_lm_adj_emm[[j]] %>%
    summary()
  flow_group_means_adj <- rbind(
    flow_group_means_adj,
    data.table(coord = lm_cols_x13[j],
               m1_emm)
  )
}
flow_group_means_adj[, coord := factor(coord, levels = lm_cols_x13)]

flow_group_means_adj[, axis := substr(coord, 1, 1)]
flow_group_means_adj[, lm := substr(coord, 2, nchar(as.character(coord)))]
flow_group_means_adj[, lm := factor(lm,
                              levels = as.character(1:13))]

# cast to wide with x and y cols for each landmark
flow_group_means_wide_adj <- dcast(flow_group_means_adj,
                   flow + stream_id + lm ~ axis,
                   value.var = "emmean")

flow_group_means_wide_adj[lm == "13", y := 0]

flow_treatment_means_wide_adj <- flow_group_means_wide_adj[, .(
  x = mean(x),
  y = mean(y)),
  by = .(flow, lm)]

# replace eye with lm1 in treatment means table
flow_treatment_means_wide_adj[lm == "12",
                              x := flow_treatment_means_wide_adj[lm == "1", x]]
flow_treatment_means_wide_adj[lm == "12",
                              y := flow_treatment_means_wide_adj[lm == "1", y]]
  1. Plot size adjusted effect of flow
gg2 <- ggplot(data = flow_group_means_wide_adj,
              aes(x = x,
                  y = y,
                  color = flow)) +
  geom_point(size = 1) +
  geom_path(data = flow_treatment_means_wide_adj[lm != "13"],
             aes(color = flow)) +
  coord_fixed() +
  theme_pubr() +
  scale_color_manual(values = pal_okabe_ito_blue) +
  NULL
gg2
Figure 2. Model mean shape coordinates

Figure 2. Model mean shape coordinates

fig_cap <- "Model mean shape coordinates"
if(is.null(getOption("knitr.in.progress"))){
  cap <- fig_cap
}else{
  cap <- capFig(fig_cap)
}

Combined plots

gg <- plot_grid(gg1, gg2, nrow = 2)
gg
Figure 3. A. Model mean shape coordinates for No Flow and Flow treatments. B. Size-adjusted mean shape coordinates for No Flow and Flow treatments using median size as the covariate.

Figure 3. A. Model mean shape coordinates for No Flow and Flow treatments. B. Size-adjusted mean shape coordinates for No Flow and Flow treatments using median size as the covariate.

fig_cap <- "A. Model mean shape coordinates for No Flow and Flow treatments. B. Size-adjusted mean shape coordinates for No Flow and Flow treatments using median size as the covariate."
if(is.null(getOption("knitr.in.progress"))){
  cap <- fig_cap
}else{
  cap <- capFig(fig_cap)
}

Flow scores

Shape only

  1. compute flow scores as vector of difference flow - no_flow
# get treament means as means of group means
flow_treatment_means <- flow_group_means[, .(emmean = mean(emmean)),
                                         by = .(coord, flow)]

flow_loadings <- dcast(flow_treatment_means,
                         coord ~ flow,
                         value.var = "emmean"
                         )
colnames(flow_loadings) <- c("lm", "flow", "no_flow")
# d is the raw difference
flow_loadings[,  d := flow - no_flow]
# e is d scaled by length of d. This is like an eigenvector
flow_loadings[, e := d/sqrt(c(t(d) %*% d))]

# note - high flow fish have more negative scores so reverse direction
# so that high scores = high flow
flow_loadings[, e := -e]

e <- c(flow_loadings[, e])
t(e) %*% e # check to see if equal to one
##      [,1]
## [1,]    1
# n x 2p matrix of coordinates
X <- flow_f2[, .SD, .SDcols = lm_cols_x13] %>%
  as.matrix()
flow_f2[, flow_score := (X %*% e)[,1]]
flow_f2[, flow_score := flow_score - mean(flow_score)]

# n_stream_id x 2p matrix of coordinates + flow_score
flow_f2_group_means_wide <- flow_f2[, lapply(.SD, mean),
                                    by = .(stream_id, treatment),
                                    .SDcols = c(lm_cols_x13, "flow_score") ] 
  1. Variance of flow score relative to total variance
total_var <- sum(diag(cov(flow_f2[, .SD, .SDcols = lm_cols_x13])))
flow_var <- sd(flow_f2[, flow_score])^2
flow_variance_frac <- flow_var/total_var
flow_variance_frac
## [1] 0.3335456
  1. Influence of each coordinate on flow score. Correlation between coordinates of stream x treatment mean and flow_score.
ycols <- c("flow_score", lm_cols_x13)
r <- cor(flow_f2_group_means_wide[, .SD, .SDcols = ycols])[-1,1]
flow_loadings[, r := r]
flow_loadings %>%
  kable (digits = c(1,4,4,4,2,2)) %>%
  kable_styling(full_width = FALSE)
lm flow no_flow d e r
x1 0.0319 -0.0193 0.0512 -0.42 -0.91
y1 -0.0216 -0.0229 0.0014 -0.01 -0.08
x2 0.5133 0.5655 -0.0522 0.43 0.93
y2 0.2032 0.1924 0.0108 -0.09 -0.32
x3 1.4975 1.4831 0.0144 -0.12 -0.68
y3 0.2773 0.2628 0.0146 -0.12 -0.45
x4 1.6952 1.7680 -0.0728 0.60 0.97
y4 0.2224 0.2054 0.0170 -0.14 -0.63
x5 2.3938 2.3823 0.0116 -0.10 -0.59
y5 0.1789 0.1804 -0.0015 0.01 0.24
x6 2.4680 2.4733 -0.0053 0.04 -0.16
y6 -0.0126 0.0019 -0.0144 0.12 0.83
x7 2.3508 2.3522 -0.0015 0.01 -0.23
y7 -0.1993 -0.1973 -0.0020 0.02 0.18
x8 1.6811 1.6947 -0.0137 0.11 0.79
y8 -0.2379 -0.2293 -0.0085 0.07 0.26
x9 1.4760 1.4610 0.0150 -0.12 -0.72
y9 -0.3130 -0.3130 0.0000 0.00 -0.17
x10 1.1323 1.1466 -0.0143 0.12 0.71
y10 -0.3753 -0.3655 -0.0098 0.08 0.30
x11 0.4800 0.4813 -0.0013 0.01 0.11
y11 -0.2771 -0.2817 0.0046 -0.04 -0.58
x12 0.2281 0.2256 0.0025 -0.02 -0.17
y12 -0.0259 -0.0082 -0.0177 0.15 0.80
x13 3.1933 3.2333 -0.0400 0.33 0.81
  1. Fit a model to the flow scores
flow_score_lm1 <- lm(flow_score ~ stream_id * flow,
                      data = flow_f2)
ggcheck_the_model(flow_score_lm1) # increased variance in high flow groups

flow_score_lm1_emm <- emmeans(flow_score_lm1,
                              specs = c("stream_id", "flow"))

flow_score_lm1_pairs <- contrast(flow_score_lm1_emm,
                                 method = flow_pairs,
                                 adjust = "none"
                                 )

Shape adjusted for size

  1. compute flow scores as vector of difference flow - no_flow
# get treament means as means of group means
flow_treatment_means_adj <- flow_group_means_adj[, .(emmean = mean(emmean)),
                                         by = .(coord, flow)]

flow_loadings_adj <- dcast(flow_treatment_means_adj,
                         coord ~ flow,
                         value.var = "emmean"
                         )
colnames(flow_loadings_adj) <- c("lm", "flow", "no_flow")
# d is the raw difference
flow_loadings_adj[,  d := flow - no_flow]
# e is d scaled by length of d. This is like an eigenvector
flow_loadings_adj[, e := d/sqrt(c(t(d) %*% d))]

# note - high flow fish have more negative scores so reverse direction
# so that high scores = high flow
flow_loadings_adj[, e := -e]

e <- c(flow_loadings_adj[, e])
t(e) %*% e # check to see if equal to one
##      [,1]
## [1,]    1
# n x 2p matrix of coordinates
X <- flow_f2[, .SD, .SDcols = lm_cols_x13] %>%
  as.matrix()
flow_f2[, flow_score_adj := (X %*% e)[,1]]
flow_f2[, flow_score_adj := flow_score_adj - mean(flow_score_adj)]

# n_stream_id x 2p matrix of coordinates + flow_score
flow_f2_group_means_wide_adj <- flow_f2[, lapply(.SD, mean),
                                    by = .(stream_id, treatment),
                                    .SDcols = c(lm_cols_x13, "flow_score_adj") ] 
  1. Variance of flow score relative to total variance
total_var <- sum(diag(cov(flow_f2[, .SD, .SDcols = lm_cols_x13])))

flow_var <- sd(flow_f2[, flow_score_adj])^2
flow_variance_frac <- flow_var/total_var
flow_variance_frac
## [1] 0.2802826
  1. Influence of each coordinate on flow score. Correlation between coordinates of stream x treatment mean and flow_score.
ycols <- c("flow_score_adj", lm_cols_x13)
r <- cor(flow_f2_group_means_wide_adj[, .SD, .SDcols = ycols])[-1,1]
flow_loadings_adj[, r := r]
flow_loadings_adj %>%
  kable (digits = c(1,4,4,4,2,2)) %>%
  kable_styling(full_width = FALSE)
lm flow no_flow d e r
x1 0.0295 -0.0144 0.0439 -0.44 -0.93
y1 -0.0219 -0.0222 0.0003 0.00 -0.09
x2 0.5172 0.5578 -0.0407 0.41 0.93
y2 0.2037 0.1913 0.0124 -0.12 -0.33
x3 1.4973 1.4837 0.0136 -0.14 -0.69
y3 0.2758 0.2658 0.0100 -0.10 -0.46
x4 1.6990 1.7604 -0.0614 0.62 0.97
y4 0.2208 0.2087 0.0121 -0.12 -0.64
x5 2.3938 2.3823 0.0116 -0.12 -0.56
y5 0.1799 0.1785 0.0014 -0.01 0.20
x6 2.4676 2.4739 -0.0063 0.06 -0.12
y6 -0.0109 -0.0014 -0.0095 0.10 0.82
x7 2.3504 2.3529 -0.0024 0.02 -0.21
y7 -0.1982 -0.1996 0.0014 -0.01 0.16
x8 1.6801 1.6967 -0.0166 0.17 0.78
y8 -0.2374 -0.2303 -0.0071 0.07 0.30
x9 1.4744 1.4643 0.0101 -0.10 -0.74
y9 -0.3126 -0.3138 0.0012 -0.01 -0.14
x10 1.1287 1.1538 -0.0251 0.25 0.74
y10 -0.3756 -0.3650 -0.0106 0.11 0.32
x11 0.4811 0.4790 0.0021 -0.02 0.08
y11 -0.2782 -0.2796 0.0014 -0.01 -0.57
x12 0.2276 0.2268 0.0008 -0.01 -0.17
y12 -0.0262 -0.0076 -0.0186 0.19 0.80
x13 3.2022 3.2155 -0.0133 0.13 0.77
  1. Fit a model to the flow scores
flow_score_adj_lm1 <- lm(flow_score_adj ~ stream_id * flow,
                      data = flow_f2)
ggcheck_the_model(flow_score_adj_lm1) # increased variance in high flow groups

flow_score_adj_lm1_emm <- emmeans(flow_score_adj_lm1,
                              specs = c("stream_id", "flow"))

flow_score_adj_lm1_pairs <- contrast(flow_score_adj_lm1_emm,
                                 method = flow_pairs,
                                 adjust = "none"
                                 )

Combined plot

Plot flow score by stream_id:treatment combinations

gg1 <- ggplot_the_response(flow_score_lm1, flow_score_lm1_emm, flow_score_lm1_pairs,
                    y_label = "Flow Score")
gg2 <- ggplot_the_response(flow_score_adj_lm1,
                           flow_score_adj_lm1_emm,
                           flow_score_adj_lm1_pairs,
                    y_label = "Flow Score")
plot_grid(gg1, gg2, ncol = 2)
Figure 4. Flow scores

Figure 4. Flow scores

fig_cap <- "Flow scores"
if(is.null(getOption("knitr.in.progress"))){
  cap <- fig_cap
}else{
  cap <- capFig(fig_cap)
}

Shape vs. size-adjusted shape scores

qplot(x = flow_loadings[, e], y = flow_loadings_adj[, e])

Static allometry of flow score

  1. treatment effect on size
lm1 <- lm(median_size ~ flow * stream_id,
         data = flow_f2)
lm2 <- lm(median_size ~ flow + stream_id,
         data = flow_f2)

coef(summary(lm2)) %>%
  kable(digits = c(3,3,2,5)) %>%
  kable_styling()
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.986 0.015 66.39 0.00000
flowFlow -0.083 0.012 -7.21 0.00000
stream_idarin -0.012 0.019 -0.62 0.53498
stream_idorop 0.036 0.020 1.81 0.07172
stream_idquar 0.018 0.020 0.90 0.36701
stream_idturu 0.052 0.020 2.61 0.00989
stream_idguan -0.124 0.021 -6.00 0.00000
  1. Standard ANCOVA model. Problem: median size is a post-treatment measure and the estimate of the treatment effect is biased.
lmm_4 <- lmer(flow_score ~ flow + median_size +
               (1|stream_id) +
               (1|stream_id:flow),
         data = flow_f2)

lmm_4_emm <- emmeans(lmm_4, specs = "flow")

coef(summary(lmm_4)) %>%
  kable(digits = c(3,3,1,1,8)) %>%
  kable_styling()
Estimate Std. Error df t value Pr(>|t|)
(Intercept) 0.243 0.039 128.3 6.2 1.000e-08
flowFlow 0.098 0.010 6.2 9.7 5.801e-05
median_size -0.294 0.039 156.0 -7.6 0.000e+00
m1 <- lm(flow_score ~ flow*stream*median_size, data = flow_f2)

gg <- ggplot(data = flow_f2,
             aes(x = median_size,
                 y = flow_score,
                 color = stream_id,
                 shape = flow)) +
  geom_ancova_grouped(m1, group_by = "flow") +
  geom_point(size = 2) +
  scale_shape_manual(values=c(16, 1)) +
  scale_color_manual(values = pal_okabe_ito_blue) +
  theme_pubr() +
  NULL

gg

m2 <- lm(flow_score ~ flow*stream + median_size, data = flow_f2)

gg <- ggplot(data = flow_f2,
             aes(x = median_size,
                 y = flow_score,
                 color = stream_id,
                 shape = flow)) +
  geom_ancova_grouped(m2, group_by = "flow") +
  geom_point(size = 2) +
  scale_shape_manual(values=c(16, 1)) +
  scale_color_manual(values = pal_okabe_ito_blue) +
  theme_pubr() +
  NULL

gg

Stream vs. flow environment variances

type3 <- list(flow = contr.sum,
              stream = contr.sum)

m1 <- lm(flow_score ~ flow*stream*median_size,
         contrasts = type3,
         data = flow_f2)
Anova(m1, type = "3")
## Anova Table (Type III tests)
## 
## Response: flow_score
##                           Sum Sq  Df F value    Pr(>F)    
## (Intercept)             0.045903   1 42.9338 8.804e-10 ***
## flow                    0.026912   1 25.1715 1.484e-06 ***
## stream                  0.009143   5  1.7104 0.1356420    
## median_size             0.039710   1 37.1410 9.083e-09 ***
## flow:stream             0.013337   5  2.4949 0.0334886 *  
## flow:median_size        0.014654   1 13.7056 0.0003011 ***
## stream:median_size      0.009303   5  1.7403 0.1288680    
## flow:stream:median_size 0.011935   5  2.2325 0.0540273 .  
## Residuals               0.158236 148                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
m2 <- lm(flow_score ~ flow*stream + median_size,
         contrasts = type3,
         data = flow_f2)
Anova(m2, type = "3")
## Anova Table (Type III tests)
## 
## Response: flow_score
##               Sum Sq  Df  F value    Pr(>F)    
## (Intercept) 0.074455   1  58.1383 2.090e-12 ***
## flow        0.248894   1 194.3489 < 2.2e-16 ***
## stream      0.060878   5   9.5073 5.967e-08 ***
## median_size 0.067409   1  52.6367 1.665e-11 ***
## flow:stream 0.018294   5   2.8570   0.01688 *  
## Residuals   0.203624 159                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
m3 <- lm(flow_score ~ flow + stream + median_size,
         contrasts = type3,
         data = flow_f2)
Anova(m3, type = "3")
## Anova Table (Type III tests)
## 
## Response: flow_score
##               Sum Sq  Df  F value    Pr(>F)    
## (Intercept) 0.090431   1  66.8294 7.658e-14 ***
## flow        0.272256   1 201.2002 < 2.2e-16 ***
## stream      0.051990   5   7.6843 1.628e-06 ***
## median_size 0.081475   1  60.2109 8.627e-13 ***
## Residuals   0.221918 164                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Genetics vs. Flow variances

flow_levels <- c("No Flow", "Flow")
flow_f0 <- shape[treatment %in% flow_levels] # includes f2
flow_f0[, flow := factor(treatment,
                              levels = flow_levels)]

stream_id_levels <- c("ardc", "arin", "orop", "quar", "turu", "guan")
pred_state_levels <- c("High", "Med", "Low", "LowIntro" )
flow_f0[, stream_id := factor(stream_id, levels = stream_id_levels)]
flow_f0[, pred_state := factor(pred_state, levels = pred_state_levels)]

# add f0 to flow treatment
flow_f0[gen == "f0", flow := "F0"]
flow_f0[, flow := factor(flow, levels = c("F0", "No Flow", "Flow"))]

# flow score
e <- c(flow_loadings[, e])
# n x 2p matrix of coordinates
X <- flow_f0[, .SD, .SDcols = lm_cols_x13] %>%
  as.matrix()
flow_f0[, flow_score := (X %*% e)[,1]]
flow_f0[, flow_score := flow_score - mean(flow_score)]

# flow score adjusted for median size
e <- c(flow_loadings_adj[, e])
# n x 2p matrix of coordinates
X <- flow_f0[, .SD, .SDcols = lm_cols_x13] %>%
  as.matrix()
flow_f0[, flow_score_adj := (X %*% e)[,1]]
flow_f0[, flow_score_adj := flow_score_adj - mean(flow_score_adj)]
# View(flow_f0[, .SD, .SDcols = c("dataset", "stream_id", "gen", "flow", "treatment")])
  1. Contrasts matrix
contrast_matrix <- diag(18)
stream_ids <- paste0(levels(flow_f2$stream_id), "_")
flow_levels <- c("f0", "noflow", "flow" )
contrast_labels <- do.call(paste0, expand.grid(stream_ids, flow_levels))

row.names(contrast_matrix) <- contrast_labels
flow_pairs <- list(
  "ardc:flow - no flow" = contrast_matrix["ardc_flow",] -
    contrast_matrix["ardc_noflow",],
  "arin:flow - no flow" = contrast_matrix["arin_flow",] -
    contrast_matrix["arin_noflow",],
  "orop:flow - no flow" = contrast_matrix["orop_flow",] -
    contrast_matrix["orop_noflow",],
  "quar:flow - no flow" = contrast_matrix["quar_flow",] -
    contrast_matrix["quar_noflow",],
  "turu:flow - no flow" = contrast_matrix["turu_flow",] -
    contrast_matrix["turu_noflow",],
  "guan:flow - no flow" = contrast_matrix["guan_flow",] -
    contrast_matrix["guan_noflow",]
)
  1. Fit a model to the flow scores
flow_score_f0_lm1 <- lm(flow_score_adj ~ stream_id * flow,
                      data = flow_f0)

flow_score_f0_lm1_emm <- emmeans(flow_score_f0_lm1,
                              specs = c("stream_id", "flow"))

flow_score_f0_lm1_pairs <- contrast(flow_score_f0_lm1_emm,
                                 method = flow_pairs,
                                 adjust = "none"
                                 )
gg3 <- ggplot_the_response(flow_score_f0_lm1, flow_score_f0_lm1_emm, flow_score_f0_lm1_pairs,
                    y_label = "Flow Score")
plot_grid(gg3)
Figure 5. Flow scores

Figure 5. Flow scores

fig_cap <- "Flow scores"
if(is.null(getOption("knitr.in.progress"))){
  cap <- fig_cap
}else{
  cap <- capFig(fig_cap)
}
type3 <- list(stream = contr.sum,
              generation = contr.sum)

m1 <- lm(flow_score ~ flow*stream*median_size,
         contrasts = type3,
         data = flow_f0)
Anova(m1, type = "3")
## Anova Table (Type III tests)
## 
## Response: flow_score
##                           Sum Sq  Df F value    Pr(>F)    
## (Intercept)             0.011603   1 16.0516 8.085e-05 ***
## flow                    0.037496   2 25.9355 5.540e-11 ***
## stream                  0.001554   5  0.4299   0.82758    
## median_size             0.016679   1 23.0733 2.657e-06 ***
## flow:stream             0.014424  10  1.9953   0.03423 *  
## flow:median_size        0.019159   2 13.2520 3.339e-06 ***
## stream:median_size      0.000923   5  0.2553   0.93691    
## flow:stream:median_size 0.013097  10  1.8118   0.05885 .  
## Residuals               0.185057 256                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
m2 <- lm(flow_score ~ flow*stream + median_size,
         contrasts = type3,
         data = flow_f0)
Anova(m2, type = "3")
## Anova Table (Type III tests)
## 
## Response: flow_score
##              Sum Sq  Df  F value    Pr(>F)    
## (Intercept) 0.06829   1  77.8004 < 2.2e-16 ***
## flow        0.36604   2 208.5065 < 2.2e-16 ***
## stream      0.01813   5   4.1303  0.001241 ** 
## median_size 0.08222   1  93.6652 < 2.2e-16 ***
## flow:stream 0.03568  10   4.0646 3.185e-05 ***
## Residuals   0.23963 273                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
m3 <- lm(flow_score ~ flow + stream + median_size,
         contrasts = type3,
         data = flow_f0)
Anova(m3, type = "3")
## Anova Table (Type III tests)
## 
## Response: flow_score
##              Sum Sq  Df F value    Pr(>F)    
## (Intercept) 0.09544   1  98.106 < 2.2e-16 ***
## flow        0.48481   2 249.183 < 2.2e-16 ***
## stream      0.04900   5  10.073 6.866e-09 ***
## median_size 0.12074   1 124.112 < 2.2e-16 ***
## Residuals   0.27530 283                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

What is the effect of predators on body shape?

  1. create subset excluding “No Flow”. Note this is effect of predators in a flow and there is likely an interaction between predators and flow but we cannot estimate that.
pred_levels <- c("Flow", "Flow + pike")
pred_f2 <- shape[treatment %in% pred_levels]
pred_f2[, pred := factor(treatment,
                              levels = pred_levels)]

stream_id_levels <- c("ardc", "arin", "orop", "quar", "turu", "guan")
pred_state_levels <- c("High", "Med", "Low", "LowIntro" )
pred_f2[, stream_id := factor(stream_id, levels = stream_id_levels)]
pred_f2[, pred_state := factor(pred_state, levels = pred_state_levels)]
  1. Fit univariate lm models
contrast_matrix <- diag(12)
stream_ids <- paste0(levels(pred_f2$stream_id), "_")
pred_levels <- c("nopred", "pred")
contrast_labels <- do.call(paste0, expand.grid(stream_ids, pred_levels))
row.names(contrast_matrix) <- contrast_labels

pred_pairs <- list(
  "ardc:pred - no pred" = contrast_matrix["ardc_pred",] -
    contrast_matrix["ardc_nopred",],
  "arin:pred - no pred" = contrast_matrix["arin_pred",] -
    contrast_matrix["arin_nopred",],
  "orop:pred - no pred" = contrast_matrix["orop_pred",] -
    contrast_matrix["orop_nopred",],
  "quar:pred - no pred" = contrast_matrix["quar_pred",] -
    contrast_matrix["quar_nopred",],
  "turu:pred - no pred" = contrast_matrix["turu_pred",] -
    contrast_matrix["turu_nopred",],
  "guan:pred - no pred" = contrast_matrix["guan_pred",] -
    contrast_matrix["guan_nopred",]
)

Univariate fit and plots

Shape only

  1. Model fits to each coordinate
pred_lm <- list()
pred_lm_emm <- list()
pred_lm_pairs <- list()
for(j in 1:length(lm_cols_x13)){
  # model fit
  form_j <- paste0(lm_cols_x13[j],
                   " ~ stream_id * pred") %>%
    formula()
  m1 <- lm(form_j, data = pred_f2)

  # inference

  m1_emm <- emmeans(m1,
                    specs = c("stream_id", "pred"))
  m1_pairs <- contrast(m1_emm,
                       method = pred_pairs,
                       adjust = "none") %>%
    summary(infer = TRUE)
  
  # save to list
  pred_lm[[length(pred_lm)+1]] <- m1
  pred_lm_emm[[length(pred_lm_emm)+1]] <- m1_emm
  pred_lm_pairs[[length(pred_lm_pairs)+1]] <- m1_pairs
}
names(pred_lm) <- lm_cols_x13
names(pred_lm_emm) <- lm_cols_x13
names(pred_lm_pairs) <- lm_cols_x13
  1. plot univariate models
pred_gg <- list()
for(j in 1:length(lm_cols_x13)){
  m1 <- pred_lm[[j]]
  m1_emm <- pred_lm_emm[[j]]
  m1_pairs <- pred_lm_pairs[[j]]
  gg <- ggplot_the_response(m1, m1_emm, m1_pairs,
                            y_label = lm_cols_x13[j])
  pred_gg[[length(pred_gg)+1]] <- gg
}
names(pred_gg) <- lm_cols_x13

plot_grid(pred_gg[[1]],
          pred_gg[[2]], ncol = 2)

plot_grid(pred_gg[[3]],
          pred_gg[[4]], ncol = 2)

plot_grid(pred_gg[[5]],
          pred_gg[[6]], ncol = 2)

plot_grid(pred_gg[[7]],
          pred_gg[[8]], ncol = 2)

plot_grid(pred_gg[[9]],
          pred_gg[[10]], ncol = 2)

plot_grid(pred_gg[[11]],
          pred_gg[[12]], ncol = 2)

plot_grid(pred_gg[[13]],
          pred_gg[[14]], ncol = 2)

plot_grid(pred_gg[[15]],
          pred_gg[[16]], ncol = 2)

plot_grid(pred_gg[[17]],
          pred_gg[[18]], ncol = 2)

plot_grid(pred_gg[[19]],
          pred_gg[[20]], ncol = 2)

plot_grid(pred_gg[[21]],
          pred_gg[[22]], ncol = 2)

plot_grid(pred_gg[[23]],
          pred_gg[[24]], ncol = 2)

plot_grid(pred_gg[[25]])

Shape adjusted for size (ANCOVA)

  1. Fit univariate lm models with size as covariate
pred_lm_adj <- list()
pred_lm_adj_emm <- list()
pred_lm_adj_pairs <- list()
for(j in 1:length(lm_cols_x13)){
  # model fit
  form_j <- paste0(lm_cols_x13[j],
                   " ~ stream_id * pred + median_size") %>%
    formula()
  m1 <- lm(form_j, data = pred_f2)

  # inference

  m1_emm <- emmeans(m1,
                    specs = c("stream_id", "pred"))
  m1_pairs <- contrast(m1_emm,
                       method = pred_pairs,
                       adjust = "none") %>%
    summary(infer = TRUE)
  
  # save to list
  pred_lm_adj[[length(pred_lm_adj)+1]] <- m1
  pred_lm_adj_emm[[length(pred_lm_adj_emm)+1]] <- m1_emm
  pred_lm_adj_pairs[[length(pred_lm_adj_pairs)+1]] <- m1_pairs
}
names(pred_lm_adj) <- lm_cols_x13
names(pred_lm_adj_emm) <- lm_cols_x13
names(pred_lm_adj_pairs) <- lm_cols_x13
  1. Plot univariate models
pred_gg <- list()
for(j in 1:length(lm_cols_x13)){
  m1 <- pred_lm_adj[[j]]
  m1_emm <- pred_lm_adj_emm[[j]]
  m1_pairs <- pred_lm_adj_pairs[[j]]
  gg <- ggplot_the_response(m1, m1_emm, m1_pairs,
                            y_label = lm_cols_x13[j])
  pred_gg[[length(pred_gg)+1]] <- gg
}
names(pred_gg) <- lm_cols_x13

plot_grid(pred_gg[[1]],
          pred_gg[[2]], ncol = 2)

plot_grid(pred_gg[[3]],
          pred_gg[[4]], ncol = 2)

plot_grid(pred_gg[[5]],
          pred_gg[[6]], ncol = 2)

plot_grid(pred_gg[[7]],
          pred_gg[[8]], ncol = 2)

plot_grid(pred_gg[[9]],
          pred_gg[[10]], ncol = 2)

plot_grid(pred_gg[[11]],
          pred_gg[[12]], ncol = 2)

plot_grid(pred_gg[[13]],
          pred_gg[[14]], ncol = 2)

plot_grid(pred_gg[[15]],
          pred_gg[[16]], ncol = 2)

plot_grid(pred_gg[[17]],
          pred_gg[[18]], ncol = 2)

plot_grid(pred_gg[[19]],
          pred_gg[[20]], ncol = 2)

plot_grid(pred_gg[[21]],
          pred_gg[[22]], ncol = 2)

plot_grid(pred_gg[[23]],
          pred_gg[[24]], ncol = 2)

plot_grid(pred_gg[[25]])

pred_gg <- list()
for(j in 1:length(lm_cols_x13)){
  m1 <- pred_lm_adj[[j]]
  gg <- ggplot(data = pred_f2,
               aes(x = median_size,
                   y = get(lm_cols_x13[j]),
                   color = stream_id)) +
    geom_point() +
    geom_ancova_grouped(m1, group_by = "pred") +
    ylab(lm_cols_x13[j]) +
    scale_color_manual(values = pal_okabe_ito_blue) +
    NULL
  
  pred_gg[[length(pred_gg)+1]] <- gg
}
names(pred_gg) <- lm_cols_x13

plot_grid(pred_gg[[1]],
          pred_gg[[2]], ncol = 2)

plot_grid(pred_gg[[3]],
          pred_gg[[4]], ncol = 2)

plot_grid(pred_gg[[5]],
          pred_gg[[6]], ncol = 2)

plot_grid(pred_gg[[7]],
          pred_gg[[8]], ncol = 2)

plot_grid(pred_gg[[9]],
          pred_gg[[10]], ncol = 2)

plot_grid(pred_gg[[11]],
          pred_gg[[12]], ncol = 2)

plot_grid(pred_gg[[13]],
          pred_gg[[14]], ncol = 2)

plot_grid(pred_gg[[15]],
          pred_gg[[16]], ncol = 2)

plot_grid(pred_gg[[17]],
          pred_gg[[18]], ncol = 2)

plot_grid(pred_gg[[19]],
          pred_gg[[20]], ncol = 2)

plot_grid(pred_gg[[21]],
          pred_gg[[22]], ncol = 2)

plot_grid(pred_gg[[23]],
          pred_gg[[24]], ncol = 2)

plot_grid(pred_gg[[25]])

Plot of effect of predator on body shape

Shape only

  1. Group means
pred_group_means <- data.table()
for(j in 1:length(lm_cols_x13)){
  m1_emm <- pred_lm_emm[[j]] %>%
    summary()
  pred_group_means <- rbind(
    pred_group_means,
    data.table(coord = lm_cols_x13[j],
               m1_emm)
  )
}
pred_group_means[, coord := factor(coord, levels = lm_cols_x13)]

pred_group_means[, axis := substr(coord, 1, 1)]
pred_group_means[, lm := substr(coord, 2, nchar(as.character(coord)))]
pred_group_means[, lm := factor(lm,
                              levels = as.character(1:13))]

# cast to wide with x and y cols for each landmark
pred_group_means_wide <- dcast(pred_group_means,
                   pred + stream_id + lm ~ axis,
                   value.var = "emmean")

pred_group_means_wide[lm == "13", y := 0]

pred_treatment_means_wide <- pred_group_means_wide[, .(
  x = mean(x),
  y = mean(y)),
  by = .(pred, lm)]

# replace eye with lm1 in treatment means table
pred_treatment_means_wide[lm == "12",
                              x := pred_treatment_means_wide[lm == "1", x]]
pred_treatment_means_wide[lm == "12",
                              y := pred_treatment_means_wide[lm == "1", y]]
  1. Plot effect of predator
gg1 <- ggplot(data = pred_group_means_wide,
              aes(x = x,
                  y = y,
                  color = pred)) +
  geom_point(size = 1) +
  geom_path(data = pred_treatment_means_wide[lm != "13"],
             aes(color = pred)) +
  coord_fixed() +
  theme_pubr() +
  scale_color_manual(values = pal_okabe_ito_blue) +
  NULL
gg1
Figure 6. Model mean shape coordinates

Figure 6. Model mean shape coordinates

fig_cap <- "Model mean shape coordinates"
if(is.null(getOption("knitr.in.progress"))){
  cap <- fig_cap
}else{
  cap <- capFig(fig_cap)
}

Shape adjusted for size

  1. Size adjusted group means
pred_group_means_adj <- data.table()
for(j in 1:length(lm_cols_x13)){
  m1_emm <- pred_lm_adj_emm[[j]] %>%
    summary()
  pred_group_means_adj <- rbind(
    pred_group_means_adj,
    data.table(coord = lm_cols_x13[j],
               m1_emm)
  )
}
pred_group_means_adj[, coord := factor(coord, levels = lm_cols_x13)]

pred_group_means_adj[, axis := substr(coord, 1, 1)]
pred_group_means_adj[, lm := substr(coord, 2, nchar(as.character(coord)))]
pred_group_means_adj[, lm := factor(lm,
                              levels = as.character(1:13))]

# cast to wide with x and y cols for each landmark
pred_group_means_wide_adj <- dcast(pred_group_means_adj,
                   pred + stream_id + lm ~ axis,
                   value.var = "emmean")

pred_group_means_wide_adj[lm == "13", y := 0]

pred_treatment_means_wide_adj <- pred_group_means_wide_adj[, .(
  x = mean(x),
  y = mean(y)),
  by = .(pred, lm)]

# replace eye with lm1 in treatment means table
pred_treatment_means_wide_adj[lm == "12",
                              x := pred_treatment_means_wide_adj[lm == "1", x]]
pred_treatment_means_wide_adj[lm == "12",
                              y := pred_treatment_means_wide_adj[lm == "1", y]]
  1. Plot size adjusted effect of pred
gg2 <- ggplot(data = pred_group_means_wide_adj,
              aes(x = x,
                  y = y,
                  color = pred)) +
  geom_point(size = 1) +
  geom_path(data = pred_treatment_means_wide_adj[lm != "13"],
             aes(color = pred)) +
  coord_fixed() +
  theme_pubr() +
  scale_color_manual(values = pal_okabe_ito_blue) +
  NULL
gg2
Figure 7. Model mean shape coordinates

Figure 7. Model mean shape coordinates

fig_cap <- "Model mean shape coordinates"
if(is.null(getOption("knitr.in.progress"))){
  cap <- fig_cap
}else{
  cap <- capFig(fig_cap)
}

Combined plots

gg <- plot_grid(gg1, gg2, nrow = 2)
gg
Figure 8. A. Model mean shape coordinates for No Pred and Pred treatments. B. Size-adjusted mean shape coordinates for No Pred and Pred treatments using median size as the covariate.

Figure 8. A. Model mean shape coordinates for No Pred and Pred treatments. B. Size-adjusted mean shape coordinates for No Pred and Pred treatments using median size as the covariate.

fig_cap <- "A. Model mean shape coordinates for No Pred and Pred treatments. B. Size-adjusted mean shape coordinates for No Pred and Pred treatments using median size as the covariate."
if(is.null(getOption("knitr.in.progress"))){
  cap <- fig_cap
}else{
  cap <- capFig(fig_cap)
}

pred scores

Shape only

  1. compute pred scores as vector of difference pred - no_pred
# get treament means as means of group means
pred_treatment_means <- pred_group_means[, .(emmean = mean(emmean)),
                                         by = .(coord, pred)]

pred_loadings <- dcast(pred_treatment_means,
                         coord ~ pred,
                         value.var = "emmean"
                         )
colnames(pred_loadings) <- c("lm", "pred", "no_pred")
# d is the raw difference
pred_loadings[,  d := pred - no_pred]
# e is d scaled by length of d. This is like an eigenvector
pred_loadings[, e := d/sqrt(c(t(d) %*% d))]

# note - high pred fish have more negative scores so reverse direction
# so that high scores = high pred
pred_loadings[, e := -e]

e <- c(pred_loadings[, e])
t(e) %*% e # check to see if equal to one
##      [,1]
## [1,]    1
# n x 2p matrix of coordinates
X <- pred_f2[, .SD, .SDcols = lm_cols_x13] %>%
  as.matrix()
pred_f2[, pred_score := (X %*% e)[,1]]
mean_pred_score <- mean(pred_f2[, pred_score]) # use for wild pred score
pred_f2[, pred_score := pred_score - mean(pred_score)]

# n_stream_id x 2p matrix of coordinates + pred_score
pred_f2_group_means_wide <- pred_f2[, lapply(.SD, mean),
                                    by = .(stream_id, treatment),
                                    .SDcols = c(lm_cols_x13, "pred_score") ] 
  1. Variance of pred score relative to total variance
total_var <- sum(diag(cov(pred_f2[, .SD, .SDcols = lm_cols_x13])))
pred_var <- sd(pred_f2[, pred_score])^2
pred_variance_frac <- pred_var/total_var
pred_variance_frac
## [1] 0.136145
  1. Influence of each coordinate on pred score. Correlation between coordinates of stream x treatment mean and pred_score.
ycols <- c("pred_score", lm_cols_x13)
r <- cor(pred_f2_group_means_wide[, .SD, .SDcols = ycols])[-1,1]
pred_loadings[, r := r]
pred_loadings %>%
  kable (digits = c(1,4,4,4,2,2)) %>%
  kable_styling(full_width = FALSE)
lm pred no_pred d e r
x1 -0.0193 0.0026 -0.0219 0.33 0.48
y1 -0.0229 -0.0122 -0.0107 0.16 0.78
x2 0.5655 0.5770 -0.0115 0.17 0.30
y2 0.1924 0.1966 -0.0042 0.06 -0.11
x3 1.4831 1.4788 0.0043 -0.07 -0.11
y3 0.2628 0.2525 0.0103 -0.15 -0.56
x4 1.7680 1.7459 0.0221 -0.33 -0.25
y4 0.2054 0.2050 0.0004 -0.01 -0.22
x5 2.3823 2.3922 -0.0100 0.15 0.21
y5 0.1804 0.1954 -0.0150 0.22 0.68
x6 2.4733 2.4654 0.0078 -0.12 -0.45
y6 0.0019 0.0109 -0.0090 0.14 0.62
x7 2.3522 2.3703 -0.0181 0.27 0.50
y7 -0.1973 -0.1914 -0.0059 0.09 0.63
x8 1.6947 1.6911 0.0036 -0.05 -0.01
y8 -0.2293 -0.2406 0.0113 -0.17 -0.60
x9 1.4610 1.4590 0.0020 -0.03 -0.28
y9 -0.3130 -0.3245 0.0115 -0.17 -0.71
x10 1.1466 1.1464 0.0001 0.00 -0.01
y10 -0.3655 -0.3602 -0.0053 0.08 0.28
x11 0.4813 0.4800 0.0013 -0.02 -0.10
y11 -0.2817 -0.2900 0.0083 -0.12 -0.41
x12 0.2256 0.2205 0.0052 -0.08 -0.14
y12 -0.0082 -0.0109 0.0028 -0.04 0.07
x13 3.2333 3.2763 -0.0430 0.65 0.84
  1. Fit a model to the pred scores
pred_score_lm1 <- lm(pred_score ~ stream_id * pred,
                      data = pred_f2)
ggcheck_the_model(pred_score_lm1) # increased variance in high pred groups

pred_score_lm1_emm <- emmeans(pred_score_lm1,
                              specs = c("stream_id", "pred"))

pred_score_lm1_pairs <- contrast(pred_score_lm1_emm,
                                 method = pred_pairs,
                                 adjust = "none"
                                 )

Shape adjusted for size

  1. compute pred scores as vector of difference pred - no_pred
# get treament means as means of group means
pred_treatment_means_adj <- pred_group_means_adj[, .(emmean = mean(emmean)),
                                         by = .(coord, pred)]

pred_loadings_adj <- dcast(pred_treatment_means_adj,
                         coord ~ pred,
                         value.var = "emmean"
                         )
colnames(pred_loadings_adj) <- c("lm", "pred", "no_pred")
# d is the raw difference
pred_loadings_adj[,  d := pred - no_pred]
# e is d scaled by length of d. This is like an eigenvector
pred_loadings_adj[, e := d/sqrt(c(t(d) %*% d))]

# note - high pred fish have more negative scores so reverse direction
# so that high scores = high pred
pred_loadings_adj[, e := -e]

e <- c(pred_loadings_adj[, e])
t(e) %*% e # check to see if equal to one
##      [,1]
## [1,]    1
# n x 2p matrix of coordinates
X <- pred_f2[, .SD, .SDcols = lm_cols_x13] %>%
  as.matrix()
pred_f2[, pred_score_adj := (X %*% e)[,1]]
pred_f2[, pred_score_adj := pred_score_adj - mean(pred_score_adj)]

# n_stream_id x 2p matrix of coordinates + pred_score
pred_f2_group_means_wide_adj <- pred_f2[, lapply(.SD, mean),
                                    by = .(stream_id, treatment),
                                    .SDcols = c(lm_cols_x13, "pred_score_adj") ] 
  1. Variance of pred score relative to total variance
total_var <- sum(diag(cov(pred_f2[, .SD, .SDcols = lm_cols_x13])))

pred_var <- sd(pred_f2[, pred_score_adj])^2
pred_variance_frac <- pred_var/total_var
pred_variance_frac
## [1] 0.1093738
  1. Influence of each coordinate on pred score. Correlation between coordinates of stream x treatment mean and pred_score.
ycols <- c("pred_score_adj", lm_cols_x13)
r <- cor(pred_f2_group_means_wide_adj[, .SD, .SDcols = ycols])[-1,1]
pred_loadings_adj[, r := r]
pred_loadings_adj %>%
  kable (digits = c(1,4,4,4,2,2)) %>%
  kable_styling(full_width = FALSE)
lm pred no_pred d e r
x1 -0.0200 0.0061 -0.0262 0.42 0.65
y1 -0.0235 -0.0096 -0.0139 0.22 0.66
x2 0.5681 0.5648 0.0033 -0.05 0.11
y2 0.1927 0.1952 -0.0024 0.04 -0.19
x3 1.4828 1.4806 0.0021 -0.03 -0.40
y3 0.2622 0.2553 0.0069 -0.11 -0.45
x4 1.7708 1.7326 0.0381 -0.61 -0.72
y4 0.2048 0.2078 -0.0030 0.05 -0.03
x5 2.3825 2.3913 -0.0088 0.14 0.58
y5 0.1807 0.1944 -0.0137 0.22 0.56
x6 2.4725 2.4689 0.0036 -0.06 0.02
y6 0.0024 0.0083 -0.0058 0.09 0.48
x7 2.3518 2.3726 -0.0208 0.33 0.73
y7 -0.1970 -0.1931 -0.0039 0.06 0.51
x8 1.6946 1.6918 0.0028 -0.04 -0.37
y8 -0.2289 -0.2426 0.0137 -0.22 -0.47
x9 1.4604 1.4617 -0.0012 0.02 -0.45
y9 -0.3129 -0.3252 0.0123 -0.20 -0.55
x10 1.1456 1.1512 -0.0057 0.09 -0.14
y10 -0.3655 -0.3603 -0.0052 0.08 0.35
x11 0.4817 0.4781 0.0037 -0.06 -0.05
y11 -0.2822 -0.2876 0.0053 -0.09 -0.15
x12 0.2253 0.2220 0.0033 -0.05 -0.06
y12 -0.0081 -0.0115 0.0034 -0.05 -0.13
x13 3.2378 3.2546 -0.0168 0.27 0.42
  1. Fit a model to the pred scores
pred_score_adj_lm1 <- lm(pred_score_adj ~ stream_id * pred,
                      data = pred_f2)
ggcheck_the_model(pred_score_adj_lm1) # increased variance in high pred groups

pred_score_adj_lm1_emm <- emmeans(pred_score_adj_lm1,
                              specs = c("stream_id", "pred"))

pred_score_adj_lm1_pairs <- contrast(pred_score_adj_lm1_emm,
                                 method = pred_pairs,
                                 adjust = "none"
                                 )

Combined plot

Plot pred score by stream_id:treatment combinations

gg1 <- ggplot_the_response(pred_score_lm1, pred_score_lm1_emm, pred_score_lm1_pairs,
                    y_label = "Pred Score")
gg2 <- ggplot_the_response(pred_score_adj_lm1,
                           pred_score_adj_lm1_emm,
                           pred_score_adj_lm1_pairs,
                    y_label = "Pred Score")
plot_grid(gg1, gg2, ncol = 2)
Figure 9. Pred scores

Figure 9. Pred scores

fig_cap <- "Pred scores"
if(is.null(getOption("knitr.in.progress"))){
  cap <- fig_cap
}else{
  cap <- capFig(fig_cap)
}

Shape vs. size-adjusted shape scores

qplot(x = pred_loadings[, e], y = pred_loadings_adj[, e])

The outlier is

pred_loadings[e > 0.6]
##     lm     pred  no_pred           d         e         r
## 1: x13 3.233285 3.276304 -0.04301822 0.6451447 0.8353839

Static allometry of pred score

  1. treatment effect on size
lm1 <- lm(median_size ~ pred * stream_id,
         data = pred_f2)
lm2 <- lm(median_size ~ pred + stream_id,
         data = pred_f2)

coef(summary(lm2)) %>%
  kable(digits = c(3,3,2,5)) %>%
  kable_styling()
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.910 0.020 44.83 0.00000
predFlow + pike -0.057 0.013 -4.38 0.00003
stream_idarin -0.039 0.023 -1.68 0.09466
stream_idorop 0.051 0.022 2.31 0.02285
stream_idquar 0.012 0.023 0.54 0.59125
stream_idturu 0.088 0.025 3.54 0.00057
stream_idguan -0.191 0.030 -6.26 0.00000
  1. Standard ANCOVA model. Problem: median size is a post-treatment measure and the estimate of the treatment effect is biased.
lmm_4 <- lmer(pred_score ~ pred + median_size +
               (1|stream_id) +
               (1|stream_id:pred),
         data = pred_f2)

lmm_4_emm <- emmeans(lmm_4, specs = "pred")

coef(summary(lmm_4)) %>%
  kable(digits = c(3,3,1,1,8)) %>%
  kable_styling()
Estimate Std. Error df t value Pr(>|t|)
(Intercept) 0.154 0.040 72.1 3.8 0.00027858
predFlow + pike 0.051 0.007 122.8 7.1 0.00000000
median_size -0.206 0.043 85.8 -4.7 0.00000869
m1 <- lm(pred_score ~ pred*stream*median_size, data = pred_f2)

gg <- ggplot(data = pred_f2,
             aes(x = median_size,
                 y = pred_score,
                 color = stream_id,
                 shape = pred)) +
  geom_ancova_grouped(m1, group_by = "pred") +
  geom_point(size = 2) +
  scale_shape_manual(values=c(16, 1)) +
  scale_color_manual(values = pal_okabe_ito_blue) +
  theme_pubr() +
  NULL

gg

m2 <- lm(pred_score ~ pred*stream + median_size, data = pred_f2)

gg <- ggplot(data = pred_f2,
             aes(x = median_size,
                 y = pred_score,
                 color = stream_id,
                 shape = pred)) +
  geom_ancova_grouped(m2, group_by = "pred") +
  geom_point(size = 2) +
  scale_shape_manual(values=c(16, 1)) +
  scale_color_manual(values = pal_okabe_ito_blue) +
  theme_pubr() +
  NULL

gg

Stream vs. flow environment variances

type3 <- list(pred = contr.sum,
              stream = contr.sum)

m1 <- lm(pred_score ~ pred*stream*median_size,
         contrasts = type3,
         data = pred_f2)
Anova(m1, type = "3")
## Anova Table (Type III tests)
## 
## Response: pred_score
##                           Sum Sq  Df F value Pr(>F)
## (Intercept)             0.000458   1  0.3078 0.5803
## pred                    0.000008   1  0.0056 0.9407
## stream                  0.003611   5  0.4858 0.7862
## median_size             0.000217   1  0.1457 0.7035
## pred:stream             0.004666   5  0.6278 0.6789
## pred:median_size        0.000028   1  0.0190 0.8907
## stream:median_size      0.003576   5  0.4810 0.7897
## pred:stream:median_size 0.004611   5  0.6202 0.6846
## Residuals               0.151643 102
m2 <- lm(pred_score ~ pred*stream + median_size,
         contrasts = type3,
         data = pred_f2)
Anova(m2, type = "3")
## Anova Table (Type III tests)
## 
## Response: pred_score
##               Sum Sq  Df F value    Pr(>F)    
## (Intercept) 0.030148   1 21.3895 1.004e-05 ***
## pred        0.054230   1 38.4756 9.357e-09 ***
## stream      0.033070   5  4.6926 0.0006246 ***
## median_size 0.030432   1 21.5913 9.191e-06 ***
## pred:stream 0.003227   5  0.4580 0.8067318    
## Residuals   0.159269 113                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
m3 <- lm(pred_score ~ pred + stream + median_size,
         contrasts = type3,
         data = pred_f2)
Anova(m3, type = "3")
## Anova Table (Type III tests)
## 
## Response: pred_score
##               Sum Sq  Df F value    Pr(>F)    
## (Intercept) 0.030117   1 21.8698 7.827e-06 ***
## pred        0.061804   1 44.8800 7.530e-10 ***
## stream      0.040212   5  5.8401 7.412e-05 ***
## median_size 0.030591   1 22.2145 6.729e-06 ***
## Residuals   0.162496 118                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Genetics vs. Pred variances

pred_levels <- c("Flow", "Flow + pike") # get only greenhouse
pred_f0 <- shape[treatment %in% pred_levels | (dataset == "garden" & gen == "f0")]
# add f0 to flow treatment
pred_f0[, pred := treatment]
pred_f0[gen == "f0", pred := "F0"]
pred_f0[, pred := factor(pred, levels = c("F0", pred_levels))]


stream_id_levels <- c("ardc", "arin", "orop", "quar", "turu", "guan")
pred_state_levels <- c("High", "Med", "Low", "LowIntro" )
pred_f0[, stream_id := factor(stream_id, levels = stream_id_levels)]
pred_f0[, pred_state := factor(pred_state, levels = pred_state_levels)]

# pred score
e <- c(pred_loadings[, e])
# n x 2p matrix of coordinates
X <- pred_f0[, .SD, .SDcols = lm_cols_x13] %>%
  as.matrix()
pred_f0[, pred_score := (X %*% e)[,1]]
pred_f0[, pred_score := pred_score - mean(pred_score)]

# flow score adjusted for median size
e <- c(pred_loadings_adj[, e])
# n x 2p matrix of coordinates
X <- pred_f0[, .SD, .SDcols = lm_cols_x13] %>%
  as.matrix()
pred_f0[, pred_score_adj := (X %*% e)[,1]]
pred_f0[, pred_score_adj := pred_score_adj - mean(pred_score_adj)]
# View(pred_score_adj[, .SD, .SDcols = c("dataset", "stream_id", "gen", "flow", "treatment")])
  1. Contrasts matrix
contrast_matrix <- diag(18)
stream_ids <- paste0(levels(pred_f2$stream_id), "_")
pred_levels <- c("f0", "nopred", "pred" )
contrast_labels <- do.call(paste0, expand.grid(stream_ids, pred_levels))

row.names(contrast_matrix) <- contrast_labels
pred_pairs <- list(
  "ardc:pred - no pred" = contrast_matrix["ardc_pred",] -
    contrast_matrix["ardc_nopred",],
  "arin:pred - no pred" = contrast_matrix["arin_pred",] -
    contrast_matrix["arin_nopred",],
  "orop:pred - no pred" = contrast_matrix["orop_pred",] -
    contrast_matrix["orop_nopred",],
  "quar:pred - no pred" = contrast_matrix["quar_pred",] -
    contrast_matrix["quar_nopred",],
  "turu:pred - no pred" = contrast_matrix["turu_pred",] -
    contrast_matrix["turu_nopred",],
  "guan:pred - no pred" = contrast_matrix["guan_pred",] -
    contrast_matrix["guan_nopred",]
)
  1. Fit a model to the pred scores
pred_score_f0_lm1 <- lm(pred_score_adj ~ stream_id * pred,
                      data = pred_f0)

pred_score_f0_lm1_emm <- emmeans(pred_score_f0_lm1,
                              specs = c("stream_id", "pred"))

pred_score_f0_lm1_pairs <- contrast(pred_score_f0_lm1_emm,
                                 method = pred_pairs,
                                 adjust = "none"
                                 )
  1. Plot of predator score vs. F0, Flow, Flow + Pike
gg3 <- ggplot_the_response(pred_score_f0_lm1, pred_score_f0_lm1_emm, pred_score_f0_lm1_pairs,
                    y_label = "Pred Score")
plot_grid(gg3)
Figure 10. Pred scores

Figure 10. Pred scores

fig_cap <- "Pred scores"
if(is.null(getOption("knitr.in.progress"))){
  cap <- fig_cap
}else{
  cap <- capFig(fig_cap)
}

Wild caught guppies

wild <- shape[dataset == "wild"]

Score by site figure

score_by_site <- function(
  fig_dat,
  score, # the y variable
  y_label = "score",
  clickable = TRUE
  ){
  
  fig_dat <- orderBy(~ pred_state + drainage, fig_dat)
  fig_dat[, stream_id := factor(stream_id,
                                 levels = unique(stream_id))]

  site_n <- fig_dat[, .(N = .N),
                               by = .(stream_id, pred_state, drainage)]
  site_sets <- site_n[, .(N = .N),
                               by = .(pred_state, drainage)]
  site_sets[, pos_x := cumsum(N) + 0.5]

  pd <- position_dodge(0.8)
  gg <- ggplot(data = fig_dat,
               aes(x = stream_id,
                   y = get(score),
                   color = pred_state,
                   shape = drainage)) +
    scale_color_manual(values = pal_okabe_ito_blue) +
    # geom_point(position = pd) +
    geom_point_interactive(aes(tooltip = stream_id,
                               data_id = stream_id),
                           position = pd) +
    geom_vline(xintercept = site_sets[, pos_x],
               linetype = "dashed") +
    geom_vline(xintercept = c(8.5, 16.5, 19.5),
               size = 1.5) +
    ylab(y_label) +
    theme_pubr() +
    theme(axis.text.x = element_blank()) +
    
    NULL
  
  # gg

  # if(clickable == TRUE){
  #   gg <- gg + geom_point_interactive(aes(tooltip = stream_id,
  #                              data_id = stream_id),
  #                          position = pd)
  # }else{
  #   gg <- gg + geom_point(position = pd)
  # }
  
  return(gg)
}

Flow score distribution among wild sites

Y <- wild[, .SD, .SDcols = lm_cols_x13] %>%
  as.matrix()
e <- flow_loadings_adj[1:25, e]
wild[, flow_score := (Y %*% e)[, 1]]
wild[, flow_score := flow_score - mean(flow_score)]
gg <- score_by_site(wild,
                    score = "flow_score",
                    y_label = "Flow score")
# gg

fig_cap <- "Flow Scores for wild samples. Positive is high flow shape."
if(is.null(getOption("knitr.in.progress"))){
  cap <- fig_cap
  gg_print <- gg
}else{
  cap <- capFig(fig_cap)
  gg_print <- girafe(ggobj = gg)
}

gg_print

Figure 11. Flow Scores for wild samples. Positive is high flow shape.

Pred score distribution among wild sites

Y <- wild[, .SD, .SDcols = lm_cols_x13] %>%
  as.matrix()
e <- pred_loadings_adj[1:25, e]
wild[, pred_score := (Y %*% e)[, 1]]
wild[, pred_score := pred_score - mean(pred_score)]
gg <- score_by_site(wild,
                    score = "pred_score",
                    y_label = "Predator score")
# gg

fig_cap <- "Predator Scores for wild samples. Positive is high predator shape."
if(is.null(getOption("knitr.in.progress"))){
  cap <- fig_cap
  gg_print <- gg
}else{
  cap <- capFig(fig_cap)
  gg_print <- girafe(ggobj = gg)
}

gg_print

Figure 12. Predator Scores for wild samples. Positive is high predator shape.

qplot(x = centroid_size, y = pred_score, data = wild)

qplot(x = centroid_size, y = flow_score, data = wild)